rm(list=ls()) 
print(Sys.time())
# load libraries
library(stringr)
library(data.table)
library(lubridate)
library(tictoc)
library(dplyr)
library(dtplyr)
library(zoo)
library(openxlsx)
library(taRifx)
library(haven)
library(sandwich)
library(lmtest)

options(max.print=999999)



# Sets the current path
current_path <- "XXX"
setwd(current_path)


#put bond_master, Updated2020_FISD_Issue/rating_date.csv, and raw EOD data under the same directory
#setwd("~/Corporate_Bond_TRACE/data/TRACE_EOD/Raw_Data")
bond_master<-fread("bond_master.csv")
FISD_fundamentals<-fread("Updated2020_FISD_Issue.csv")
DT_bondratings<-fread("Updated2020_FISD_rating_data.csv")

dir_list_EOD_MPPBTDSS<-list.files(pattern = "^MPPBTDSS_CUSIP_2020")

AllCombine_EOD_MPPBTDSS<-
  Reduce(rbind, lapply(dir_list_EOD_MPPBTDSS, fread))

fwrite(AllCombine_EOD_MPPBTDSS,"RawData_AllCombine_EOD_MPPBTDSS.csv")

AllCombine_EOD_MPPBTDSS<-fread("RawData_AllCombine_EOD_MPPBTDSS.csv")

setnames(AllCombine_EOD_MPPBTDSS,tolower(colnames(AllCombine_EOD_MPPBTDSS)))

print(table(AllCombine_EOD_MPPBTDSS$message_category))

TradeData<-AllCombine_EOD_MPPBTDSS[message_category=="T"]
AdministrativeData<-AllCombine_EOD_MPPBTDSS[message_category=="A"]
print(nrow(TradeData))
print(nrow(AdministrativeData))

COLNAMES<-c(colnames(AllCombine_EOD_MPPBTDSS))
COLNAMES_T_DATA<-COLNAMES[1:60]
COLNAMES_A_DATA<-COLNAMES[61:length(COLNAMES)]

TradeData<-TradeData[,c(COLNAMES_T_DATA),with=FALSE]
AdministrativeData<-AdministrativeData[,c("execution_date_time","date_time","message_type",COLNAMES_A_DATA),with=FALSE]
AdministrativeData[,`:=`(trd_exctn_dt_s=substr(date_time,1,10))]

###################################################
###check whether EOD data provides market sentiments
AdministrativeData<-AdministrativeData[trd_exctn_dt_s=="2020-04-16"]
AdministrativeData<-AdministrativeData[!(message_type %in% c("E","M","H","N","O"))]
###################################################

#[1]Dick-Nelson filter
print(table(TradeData$message_type))
TradeData[,`:=`(Status="")]
TradeData[message_type=="M",`:=`(Status="Trade")]
TradeData[message_type=="N",`:=`(Status="Cancel")]
TradeData[message_type=="O",`:=`(Status="Correction")]
print(table(TradeData$Status))

###check duplicates
TradeData[,`:=`(checkduplicate=.N),
          by=c(colnames(TradeData))]
table(TradeData$checkduplicate)
TradeData[,`:=`(checkduplicate=NULL)]

TradeData[,`:=`(trd_exctn_dt_s=substr(execution_date_time,1,10))]
setcolorder(TradeData,c("execution_date_time","trd_exctn_dt_s"))

TradeData_Cancel<-TradeData[Status=="Cancel"]
TradeData_Correction<-TradeData[Status=="Correction"]
TradeData_Cancel[,`:=`(message_sequence_number=NULL)]
TradeData_Correction[,`:=`(message_sequence_number=NULL)]
setnames(TradeData_Cancel,c("original_message_sequence_number"),c("message_sequence_number"))
setnames(TradeData_Correction,c("original_message_sequence_number"),c("message_sequence_number"))
TradeData_Trade<-TradeData[Status=="Trade"]
TradeData_Trade[,`:=`(original_message_sequence_number=NULL)]

nrow(TradeData_Trade)
nrow(TradeData_Cancel)
TradeData_Trade<-
  TradeData_Trade[!TradeData_Cancel, on=c("message_sequence_number","cusip","trd_exctn_dt_s")]
nrow(TradeData_Trade)

nrow(TradeData_Correction)


TradeData_Trade<-
  TradeData_Trade[!TradeData_Correction, on=c("message_sequence_number","trd_exctn_dt_s","cusip")]
nrow(TradeData_Trade)

TradeData_filter<-rbind(TradeData_Trade,
                        TradeData_Correction)
TradeData_filter[,`:=`(message_sequence_number=NULL)]

setnames(TradeData_filter,
         c("cusip","date_time","quantity","price","reporting_party_type"),
         c("cusip_id","dissemination_date","Quantity(ParVal)","rptd_pr","rptg_party_type"))

TradeData_filter[,`:=`(`Quantity(ParVal)_num`=as.numeric(`Quantity(ParVal)`))]
TradeData_filter[,`:=`(`dum_Quantity(ParVal)_5MM`=0,
                       `dum_Quantity(ParVal)_1MM`=0)]
TradeData_filter[`Quantity(ParVal)`=="5MM+",
                 `:=`(`dum_Quantity(ParVal)_5MM`=1)]
TradeData_filter[`Quantity(ParVal)`=="1MM+",
                 `:=`(`dum_Quantity(ParVal)_1MM`=1)]

print(nrow(TradeData_filter[is.na(`Quantity(ParVal)_num`)]))
print(nrow(TradeData_filter[`dum_Quantity(ParVal)_5MM`==1|`dum_Quantity(ParVal)_1MM`==1]))                                

TradeData_filter[,`:=`(trade_type="")]

print(table(TradeData_filter$rptg_party_type))
print(table(TradeData_filter$contra_party_type))

###There are only S trades for DD trades.
print(table(as.matrix(TradeData_filter[((rptg_party_type=="D")|(rptg_party_type=="T"))&
                                         ((contra_party_type=="D")|(contra_party_type=="T")),
                                       c("side"),
                                       with=FALSE])))

###"A" is treated as customer "C"; "T" is treated as FINRA member.
print(table(as.matrix(TradeData_filter[((rptg_party_type=="D")|(rptg_party_type=="T"))&
                                         (contra_party_type=="A"),
                                       c("side"),
                                       with=FALSE])))
###"T" means ATS trades
print(table(as.matrix(TradeData_filter[(rptg_party_type=="T")|
                                         (contra_party_type=="T"),
                                       c("ats_indicator"),
                                       with=FALSE])))

###Why are there still ATS==Y? because some member firms report on behalf of ATS or ATS is reporting on behalf of others?
print(table(as.matrix(TradeData_filter[(rptg_party_type!="T")&
                                         (contra_party_type!="T"),
                                       c("ats_indicator"),
                                       with=FALSE])))

TradeData_filter[(contra_party_type=="D"|contra_party_type=="T")&
                   (side=="S"),`:=`(trade_type="D2D")]
TradeData_filter[(contra_party_type=="C"|contra_party_type=="A")&
                   (side=="S"),`:=`(trade_type="D2C")]
TradeData_filter[(contra_party_type=="C"|contra_party_type=="A")&
                   (side=="B"),`:=`(trade_type="C2D")]

print(table(TradeData_filter$trade_type))
# ###no party type trades.
# print(table(TradeData_filter[trade_type=="",
#                                       c("Contra Party Type","Reporting Party Type","side"),
#                                       with=FALSE]))

###for trade_type=="", we can use Remuneration and side to decide whether they are DC or DD.
###Remuneration="" only for DD trades and also DD trades cannot be B.
###For Remuneration!="", then can be DC/DA trades.
# TradeData_filter[(trade_type=="")&
#                             (side=="S")&
#                             (Remuneration==""),
#                           `:=`(rptg_party_type="D",
#                                contra_party_type="D",
#                                trade_type="D2D")]
# TradeData_filter[(trade_type=="")&
#                             (Remuneration!="")&
#                             (side=="S"),
#                           `:=`(rptg_party_type="D",
#                                contra_party_type="DC(A)",
#                                trade_type="D2C")]
# TradeData_filter[(trade_type=="")&
#                             (Remuneration!="")&
#                             (side=="B"),
#                           `:=`(rptg_party_type="D",
#                                contra_party_type="DC(A)",
#                                trade_type="C2D")]
# print(table(TradeData_filter[,
#                                       c("trade_type"),
#                                       with=FALSE]))

print(summary(TradeData_filter$rptd_pr))
print(summary(as.matrix(TradeData_filter[!is.na(`Quantity(ParVal)_num`),
                                         c("Quantity(ParVal)_num"),
                                         with=FALSE])))

message(paste0("number of obs in combined trade data is ",nrow(TradeData_filter)))
message(" ")
message(paste0("number of bonds is ",
               nrow(unique(as.matrix(TradeData_filter[,c("cusip_id"),with=FALSE])))))
message(" ")

#########################
#[2]Merge with fundamentals
###[2-1]bond_master
setnames(bond_master,c("bond_sym_id","bsym_id","sub_prdct_type"),
         c("symbol","bsym","sub_product_type"))
merge_index<-c("cusip_id","symbol","bsym","sub_product_type")
bond_master[,`:=`(count_seq_cusip=sequence(.N)),
            by=c(merge_index)]
summary(bond_master$count_seq_cusip)
bond_master[,`:=`(num_grade=0)]
bond_master[grade=="I",`:=`(num_grade=1)]
bond_master[,`:=`(mean_num_grade=mean(num_grade,na.rm = TRUE),
                  mean_cpn_rt=mean(cpn_rt,na.rm = TRUE)),
            by=c(merge_index)]
summary(bond_master$mean_num_grade)

bond_master<-bond_master[count_seq_cusip==1]
bond_master[,`:=`(mean_grade="")]
bond_master[mean_num_grade>=0.66,`:=`(mean_grade="I")]
bond_master[mean_num_grade<0.66,`:=`(mean_grade="H")]

table(bond_master$mean_grade)

nrow(TradeData_filter)
TradeData_filter_masterfundamental<-left_join(TradeData_filter,
                            bond_master,
                            by=c(merge_index))
TradeData_filter_masterfundamental[,`:=`(num_grade=NULL)]
nrow(TradeData_filter_masterfundamental)

TradeData_filter_masterfundamental<-as.data.table(TradeData_filter_masterfundamental)

table(TradeData_filter_masterfundamental$mean_grade)
summary(TradeData_filter_masterfundamental$count_seq_cusip)

###[2-2]FISD_fundamentals
setnames(FISD_fundamentals,tolower(colnames(FISD_fundamentals)))
setnames(FISD_fundamentals,
         c("complete_cusip"),
         c("cusip_id"))

###FISD unique by cusip_id and maturity or simply cusip_id
FISD_fundamentals[,`:=`(count_seq=sequence(.N)),
                  by=c("cusip_id","maturity")]
table(FISD_fundamentals$count_seq)

FISD_fundamentals[,`:=`(mtrty_dt_s=as.Date(paste0(substr(as.character(maturity),1,4),"-",
                                                  substr(as.character(maturity),5,6),"-",
                                                  substr(as.character(maturity),7,8))))]

setcolorder(FISD_fundamentals,c("cusip_id","amount_outstanding","offering_amt","offering_date","maturity","mtrty_dt_s"))

FISD_fundamentals_variablelist<-
  c("cusip_id","amount_outstanding","offering_amt","offering_date","maturity","mtrty_dt_s",
    "issue_id","issuer_id","prospectus_issuer_name","issuer_cusip","issue_cusip","issue_name",
    "canadian", "oid", "foreign_currency", "slob",
    "convertible","yankee","rule_144a","treasury_spread","treasury_maturity","denomination","isin","perpetual","sec_cusip",
    "industry_group","industry_code","sic_code","naics_code","country_domicile")

FISD_fundamentals<-
  FISD_fundamentals[,c(FISD_fundamentals_variablelist),with=FALSE]

rm(Trial)
nrow(TradeData_filter_masterfundamental)
Trial<-
  as.data.table(left_join(TradeData_filter_masterfundamental,
            FISD_fundamentals,
            by=c("cusip_id")))
nrow(Trial)
Trial<-as.data.table(Trial)
setcolorder(Trial,
            c("mtrty_dt_s","offering_date","maturity","amount_outstanding","offering_amt"))

colnames(Trial)
summary(Trial$offering_amt)
summary(Trial$amount_outstanding)

nrow(Trial)
Trial<-as.data.table(Trial)
TradeData_filter_masterfundamental_FISDscrapefundamentals<-Trial


TradeData_filter_masterfundamental_FISDscrapefundamentals<-
  TradeData_filter_masterfundamental_FISDscrapefundamentals[!(cusip_id==" "|cusip_id==""|is.na(cusip_id))]

###mtrty_dt is from master file which has the lowest missing rate.
print(nrow(TradeData_filter_masterfundamental_FISDscrapefundamentals[is.na(mtrty_dt)]))

TradeData_filter_masterfundamental_FISDscrapefundamentals[,`:=`(mtrty_dt_s=as.Date(paste0(substr(as.character(mtrty_dt),1,4),"-",
                                                                                          substr(as.character(mtrty_dt),5,6),"-",
                                                                                          substr(as.character(mtrty_dt),7,8))))]
print(nrow(TradeData_filter_masterfundamental_FISDscrapefundamentals[is.na(mtrty_dt_s)]))

###for offeringdate and amountoutstanding
TradeData_filter_masterfundamental_FISDscrapefundamentals[!is.na(offering_date),`:=`(offering_date_s=as.Date(paste0(substr(as.character(offering_date),1,4),"-",
                                                                                          substr(as.character(offering_date),5,6),"-",
                                                                                          substr(as.character(offering_date),7,8))))]

print(nrow(TradeData_filter_masterfundamental_FISDscrapefundamentals[is.na(offering_date_s)]))

TradeData_filter_masterfundamental_FISDscrapefundamentals[,`:=`(offering_dt_s=offering_date_s)]

summary(TradeData_filter_masterfundamental_FISDscrapefundamentals$amount_outstanding)
summary(TradeData_filter_masterfundamental_FISDscrapefundamentals$offering_amt)

print(quantile(TradeData_filter_masterfundamental_FISDscrapefundamentals$amount_outstanding,probs = 0.001,na.rm = TRUE))
print(quantile(TradeData_filter_masterfundamental_FISDscrapefundamentals$offering_amt,probs = 0.001,na.rm = TRUE))

TradeData_filter_masterfundamental_FISDscrapefundamentals[,`:=`(ins_amount_outstanding=amount_outstanding)]
summary(TradeData_filter_masterfundamental_FISDscrapefundamentals$ins_amount_outstanding)
TradeData_filter_masterfundamental_FISDscrapefundamentals[is.na(ins_amount_outstanding)|(ins_amount_outstanding==0),
                                                          `:=`(ins_amount_outstanding=offering_amt)]
summary(TradeData_filter_masterfundamental_FISDscrapefundamentals$ins_amount_outstanding)


###We finally use mtrty_dt_s, offering_dt_s and ins_amount_outstanding (thousands)
TradeData_filter_masterfundamental_FISDscrapefundamentals[,time_to_maturity:=0]
TradeData_filter_masterfundamental_FISDscrapefundamentals[,
                 time_to_maturity:=as.integer(as.Date(mtrty_dt_s)-as.Date(trd_exctn_dt_s))]

TradeData_filter_masterfundamental_FISDscrapefundamentals[,time_to_offering:=0]
TradeData_filter_masterfundamental_FISDscrapefundamentals[,
                 time_to_offering:=as.integer(as.Date(trd_exctn_dt_s)-as.Date(offering_dt_s))]

print(summary(TradeData_filter_masterfundamental_FISDscrapefundamentals$time_to_maturity))
print(summary(TradeData_filter_masterfundamental_FISDscrapefundamentals$time_to_offering))
print(nrow(TradeData_filter_masterfundamental_FISDscrapefundamentals[is.na(time_to_offering)])/
        nrow(TradeData_filter_masterfundamental_FISDscrapefundamentals))
print(nrow(TradeData_filter_masterfundamental_FISDscrapefundamentals[is.na(time_to_maturity)])/
        nrow(TradeData_filter_masterfundamental_FISDscrapefundamentals))


setcolorder(TradeData_filter_masterfundamental_FISDscrapefundamentals,
            c("time_to_maturity","time_to_offering"))

fwrite(TradeData_filter_masterfundamental_FISDscrapefundamentals,
       "TRACE_EOD_TradeData_filter_masterfundamental_FISDscrapefundamentals.csv")


###################################################
TradeData_filter_masterfundamental_FISDscrapefundamentals<-
  fread("TRACE_EOD_TradeData_filter_masterfundamental_FISDscrapefundamentals.csv")

###[3]Merge with FISD credit rating
print(nrow(TradeData_filter_masterfundamental_FISDscrapefundamentals[is.na(mean_grade)])
      /nrow(TradeData_filter_masterfundamental_FISDscrapefundamentals))

FISD_identifier<-c("issue_id","issuer_id","issuer_cusip",
                   "issue_cusip","cusip_id","prospectus_issuer_name",
                   "issue_name","mtrty_dt","offering_dt")

setnames(DT_bondratings,tolower(colnames(DT_bondratings)))

setnames(DT_bondratings,
         c("complete_cusip"),
         c("cusip_id"))
DT_bondratings[,`:=`(mtrty_dt=as.character(maturity),
                     offering_dt=as.character(offering_date),
                     rating_dt=as.character(rating_date))]

#drop duplicates by "rating_type", "rating_date" and FISD_identifier
DT_bondratings[,`:=`(count=.N,count_seq=sequence(.N)),
               by=c("rating_type","rating_date",FISD_identifier)]
table(DT_bondratings$count_seq)
DT_bondratings<-DT_bondratings[!count_seq>1]

###run codes to assign values to different ratings
source("assign_val_rating.R")

print(table(DT_bondratings$mean_rating))
print(table(DT_bondratings$max_investment_grade))

DT_bondratings<-DT_bondratings[,c("cusip_id","mtrty_dt","mean_rating_code",
                                  "rating_dt"),
                               with=FALSE]

DT_bondratings[,`:=`(mtrty_dt_new=paste0(substr(mtrty_dt,1,4),"-",substr(mtrty_dt,5,6),"-",
                                         substr(mtrty_dt,7,8)))]
DT_bondratings[,`:=`(rating_dt_new=paste0(substr(rating_dt,1,4),"-",substr(rating_dt,5,6),"-",
                                          substr(rating_dt,7,8)))]
DT_bondratings[,`:=`(mtrty_dt=NULL)]
DT_bondratings[,`:=`(rating_dt=NULL)]
setnames(DT_bondratings,c("mtrty_dt_new"),c("mtrty_dt_s"))
setnames(DT_bondratings,c("rating_dt_new"),c("rating_dt"))

DT_bondratings_identifier<-as.data.table(unique(as.matrix(DT_bondratings[,c("cusip_id","mtrty_dt_s"),
                                                                         with=FALSE])))

setnames(DT_bondratings,
         c("rating_dt","mean_rating_code"),c("DATE","new_mean_rating_code"))
###make sure cusip_id+mtrty_dt_s+DATE (rating_dt) uniquely identfies credit rating in DT_bondratings
print(nrow(DT_bondratings))
DT_bondratings<-unique(DT_bondratings)
print(nrow(DT_bondratings))

DT_bondratings[,`:=`(count_seq=sequence(.N)),
               by=c("cusip_id","mtrty_dt_s","DATE")]
print(table(DT_bondratings$count_seq))

nrow(unique(as.matrix(DT_bondratings[,c("cusip_id","mtrty_dt_s","DATE"),
                                     with=FALSE])))

TradeData_filter_masterfundamental_FISDscrapefundamentals[,`:=`(DATE=as.character(trd_exctn_dt_s))]
DT_bondratings[mtrty_dt_s=="NA-NA-NA",`:=`(mtrty_dt_s=NA)]
DT_bondratings[,`:=`(mtrty_dt_s=as.character(mtrty_dt_s))]

print(nrow(TradeData_filter_masterfundamental_FISDscrapefundamentals))

DT_bondratings[,`:=`(mtrty_dt_s=as.Date(mtrty_dt_s))]

fwrite(DT_bondratings,"TRACE_EOD_DT_bondratings.csv")

TradeData_filter_masterfundamental_FISDscrapefundamentals[,`:=`(DATE=as.character(trd_exctn_dt_s))]
DT_bondratings[,`:=`(DATE=as.character(DATE))]

TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating<-
  full_join(TradeData_filter_masterfundamental_FISDscrapefundamentals,
            DT_bondratings,
            by=c("cusip_id","mtrty_dt_s","DATE"))

TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating<-
  as.data.table(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating)

TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating<-
  TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[order(cusip_id,mtrty_dt_s,DATE)]

nrow(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[is.na(new_mean_rating_code)])

class(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating$new_mean_rating_code)
nrow(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[is.nan(new_mean_rating_code)])
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[is.nan(new_mean_rating_code),`:=`(new_mean_rating_code=NA)]

TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[,
                                                                                    `:=`(new_mean_rating_code=na.locf(new_mean_rating_code,na.rm = FALSE)),
                                                                                    by=c("cusip_id","mtrty_dt_s")]

nrow(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[is.na(new_mean_rating_code)])

TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[,
                                                                                    new_mean_rating_code:=na.locf(new_mean_rating_code,na.rm = FALSE,fromLast = TRUE),
                                                                                    by=c("cusip_id","mtrty_dt_s")]
nrow(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[is.na(new_mean_rating_code)])

table(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating$mean_rating)
summary(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating$new_mean_rating_code)


TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[,`:=`(new_mean_rating="")]

TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>5.5&new_mean_rating_code<=6.5,new_mean_rating:="A"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>4.5&new_mean_rating_code<=5.5,new_mean_rating:="A+"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>6.5&new_mean_rating_code<=7.5,new_mean_rating:="A-"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>2.5&new_mean_rating_code<=3.5,new_mean_rating:="AA"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>1.5&new_mean_rating_code<=2.5,new_mean_rating:="AA+"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>3.5&new_mean_rating_code<=4.5,new_mean_rating:="AA-"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code<=1.5,new_mean_rating:="AAA"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>14.5&new_mean_rating_code<=15.5,new_mean_rating:="B"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>13.5&new_mean_rating_code<=14.5,new_mean_rating:="B+"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>15.5&new_mean_rating_code<=16.5,new_mean_rating:="B-"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>11.5&new_mean_rating_code<=12.5,new_mean_rating:="BB"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>10.5&new_mean_rating_code<=11.5,new_mean_rating:="BB+"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>12.5&new_mean_rating_code<=13.5,new_mean_rating:="BB-"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>8.5&new_mean_rating_code<=9.5,new_mean_rating:="BBB"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>7.5&new_mean_rating_code<=8.5,new_mean_rating:="BBB+"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>9.5&new_mean_rating_code<=10.5,new_mean_rating:="BBB-"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>20.5&new_mean_rating_code<=21.5,new_mean_rating:="C"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>19.5&new_mean_rating_code<=20.5,new_mean_rating:="CC"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>17.5&new_mean_rating_code<=18.5,new_mean_rating:="CCC"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>16.5&new_mean_rating_code<=17.5,new_mean_rating:="CCC+"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>18.5&new_mean_rating_code<=19.5,new_mean_rating:="CCC-"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[new_mean_rating_code>24.5&new_mean_rating_code<=25.5,new_mean_rating:="D"]
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[is.na(new_mean_rating_code),new_mean_rating:="NR"]

table(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating$new_mean_rating)

print(nrow(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating))
#bonds not exist in FINRA trade data have "Execution Date" missing
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating<-
  na.omit(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating,cols="trd_exctn_dt_s")
print(nrow(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating))
print(nrow(TradeData_filter_masterfundamental_FISDscrapefundamentals))

print(table(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating$new_mean_rating))

summary(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating$new_mean_rating_code)

print(message(paste0("proportion of transactions of non-rated bonds is ",
                     nrow(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[is.na(new_mean_rating_code)])/nrow(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating))))

TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[,`:=`(count_seq.x=NULL,
                                                                                          count_seq.y=NULL)]

print(table(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating$mean_grade))
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating<-
  as.data.table(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating)

TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[,`:=`(new_mean_grade=mean_grade)]
print(table(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating$new_mean_grade))
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[(!(new_mean_grade %in% c("H","I")))&
                                                                                      (new_mean_rating %in% c("A","A-","A+","AA","AA-",
                                                                                                              "AA+","AAA",
                                                                                                              "BBB+","BBB","BBB-")),
                                                                                    `:=`(new_mean_grade="I")]
print(table(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating$new_mean_grade))
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating[(!(new_mean_grade %in% c("H","I")))&
                                                                                      (new_mean_rating %in% c("B","B-","B+","BB","BB-","BB+",
                                                                                                              "C","CC","CCC","CCC-","CCC+","D")),
                                                                                    `:=`(new_mean_grade="H")]
print(table(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating$new_mean_grade))


fwrite(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating,
       "TRACE_EOD_TradeData_filter_masterfundamental_FISDscrapefundamentals_mostrecentFISDrating.csv")


##########################################
TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating<-
  fread("TRACE_EOD_TradeData_filter_masterfundamental_FISDscrapefundamentals_mostrecentFISDrating.csv")
###[3] Exclude unusural bonds
###[3-1] merge with extra variables in bond fundamentals.

FISD_fundamentals<-fread("Updated2020_FISD_Issue.csv")

setnames(FISD_fundamentals,tolower(colnames(FISD_fundamentals)))
setnames(FISD_fundamentals,
         c("complete_cusip"),
         c("cusip_id"))

###FISD unique by cusip_id and maturity or simply cusip_id
FISD_fundamentals[,`:=`(count_seq=sequence(.N)),
                  by=c("cusip_id","maturity")]
table(FISD_fundamentals$count_seq)

FISD_fundamentals_variablelist<-
  c("cusip_id", "bond_type",
    "putable", "private_placement","unit_deal", "exchangeable","redeemable","fungible",
    "preferred_security",
    "coupon_type")

FISD_fundamentals<-
  FISD_fundamentals[,c(FISD_fundamentals_variablelist),with=FALSE]

Trial<-
  left_join(TradeData_filter_masterfundamental_FISDscrapefundamentalrating_mostrecentFISDrating,
            FISD_fundamentals,
            by=c("cusip_id"))

Trial<-as.data.table(Trial)


table(Trial$putable)
table(Trial$exchangeable)
table(Trial$convertible)
table(Trial$cnvrb_fl)
Trial<-as.data.table(Trial)
nrow(Trial[bond_type %in%
             c("ABS","C10Y","C1Y","C2Y","C30Y","C3M","C5Y","C6M",
               "CCUR","CTBD","CTBL","EBON","EMTN","FGOV","FGS",
               "MBS","MUNI","O10Y","O13W","O26W","O2Y","O30Y",
               "O3Y","O4W","O52W","O5Y","O7Y","PS","PSTK","TPCS",
               "TXMU","USBD","USBL","USNT","USSI","USSP","USTC")])
table(Trial$rule_144a)
table(Trial$ind_144a)
table(Trial$private_placement)
table(Trial$preferred_security)
table(Trial$coupon_type)

###9% unusural bonds transactions (the reason we do the filtering one by one using %in% is to not exclude missing values)
Trial<-Trial[!(putable %in% c("Y"))]
Trial<-Trial[!(exchangeable %in% c("Y"))]
Trial<-Trial[!(convertible %in% c("Y"))]
Trial<-Trial[!(cnvrb_fl %in% c("Y"))]
Trial<-Trial[!(rule_144a %in% c("Y"))]
Trial<-Trial[!(preferred_security %in% c("Y"))]
Trial<-Trial[!(coupon_type %in% c("V"))]
Trial<-Trial[!(bond_type %in%
                 c("ABS","C10Y","C1Y","C2Y","C30Y","C3M","C5Y","C6M",
                   "CTBD","CTBL","EBON","EMTN","FGOV","FGS",
                   "MBS","MUNI","O10Y","O13W","O26W","O2Y","O30Y",
                   "O3Y","O4W","O52W","O5Y","O7Y","PS","PSTK","TPCS",
                   "TXMU","USBD","USBL","USNT","USSI","USSP","USTC"))]

nrow(Trial[putable=="Y"|
             exchangeable=="Y"|
             convertible=="Y"|rule_144a=="Y"|preferred_security=="Y"|coupon_type=="V"|
             (bond_type %in%
                c("ABS","C10Y","C1Y","C2Y","C30Y","C3M","C5Y","C6M",
                  "CTBD","CTBL","EBON","EMTN","FGOV","FGS",
                  "MBS","MUNI","O10Y","O13W","O26W","O2Y","O30Y",
                  "O3Y","O4W","O52W","O5Y","O7Y","PS","PSTK","TPCS",
                  "TXMU","USBD","USBL","USNT","USSI","USSP","USTC"))])/nrow(Trial)

print(table(Trial$new_mean_grade))

A<-Trial[(new_mean_grade=="")]
nrow(A[putable=="Y"|
         exchangeable=="Y"|
         convertible=="Y"|rule_144a=="Y"|preferred_security=="Y"|coupon_type=="V"|
         ind_144a=="Y"|private_placement=="Y"|
         (bond_type %in%
            c("ABS","C10Y","C1Y","C2Y","C30Y","C3M","C5Y","C6M",
              "CTBD","CTBL","EBON","EMTN","FGOV","FGS",
              "MBS","MUNI","O10Y","O13W","O26W","O2Y","O30Y",
              "O3Y","O4W","O52W","O5Y","O7Y","PS","PSTK","TPCS",
              "TXMU","USBD","USBL","USNT","USSI","USSP","USTC"))])

print(table(Trial$new_mean_grade))

#Oct 2020
#1.2% obs with missing time_to_offering
nrow(Trial[is.na(time_to_maturity)])/nrow(Trial)
#8.2% obs with missing time_to_offering
nrow(Trial[is.na(time_to_offering)])/nrow(Trial)
#8.2% obs with missing ins_amount_outstanding
nrow(Trial[is.na(ins_amount_outstanding)])/nrow(Trial)
#1.3% obs with NR credit rating
nrow(Trial[new_mean_grade==""])/nrow(Trial)

fwrite(Trial,
       "TRACE_EOD_TradeData_filter_masterfundamental_FISDscrapefundamentals_mostrecentFISDrating_dropunusuralbonds.csv")


###Update offering date, amount outstanding and credit ratings.
summary(Trial$time_to_maturity)
summary(Trial$time_to_offering)
summary(Trial$amount_outstanding)
#in summary statistics code, we use ins_amount_outstanding
summary(Trial$ins_amount_outstanding)
table(Trial$new_mean_grade)

CUSIP_list<-
  as.data.table(unique(Trial[is.na(time_to_offering)|
                               is.na(ins_amount_outstanding)|
                               (new_mean_grade=="")|
                               (is.na(new_mean_grade)),
                             c("cusip_id","bsym"),
                             with=FALSE]))

#############################################################################
###[5] to deal with missing bond, further use additional data on fundamentals from bloomberg
###(Optional)

###"Part3_issue_date_amt_outstanding_rating_0607_full_CUSIPlist0521.csv" is the bloomberg data on 
###outstanding amounts and first settlement date for cusips with missing fundamentals in FISD dataset.
issue_date_amt_outstanding_secondpart_0607<-
  fread("Part3_issue_date_amt_outstanding_rating_0607_full_CUSIPlist0521.csv")

issue_date_amt_outstanding_secondpart_0607<-
  issue_date_amt_outstanding_secondpart_0607[,c("V1","AMT_OUTSTANDING","FIRST_SETTLE_DT","RTG_SP"),with=FALSE]

setnames(issue_date_amt_outstanding_secondpart_0607,
         c(colnames(issue_date_amt_outstanding_secondpart_0607)),
         c("cusip_id","amount_outstanding_usd","first_settle_date","sp_rating"))
issue_date_amt_outstanding_secondpart_0607<-
  as.data.table(issue_date_amt_outstanding_secondpart_0607)

issue_date_amt_outstanding_secondpart_0607[,
                                           `:=`(amountoutstanding_thousand_bb_part2=as.numeric(amount_outstanding_usd)/1000,
                                                first_settle_date_strf=
                                                  strftime(as.Date(first_settle_date, format="%m/%d/%Y"), format="%m/%d/%Y"))]
nrow(issue_date_amt_outstanding_secondpart_0607)
issue_date_amt_outstanding_secondpart_0607<-
  issue_date_amt_outstanding_secondpart_0607[!is.na(first_settle_date_strf)]
nrow(issue_date_amt_outstanding_secondpart_0607[is.na(first_settle_date_strf)])
nrow(issue_date_amt_outstanding_secondpart_0607)

issue_date_amt_outstanding_secondpart_0607[
  ,
  `:=`(first_settle_date_strf_year=substr(first_settle_date_strf,7,8))
]
issue_date_amt_outstanding_secondpart_0607[
  (first_settle_date_strf_year>"20"),
  `:=`(first_settle_date_strf_year_full=paste0("19",first_settle_date_strf_year))
]
issue_date_amt_outstanding_secondpart_0607[
  (first_settle_date_strf_year<="20"),
  `:=`(first_settle_date_strf_year_full=paste0("20",first_settle_date_strf_year))
]

nrow(issue_date_amt_outstanding_secondpart_0607)
issue_date_amt_outstanding_secondpart_0607<-
  issue_date_amt_outstanding_secondpart_0607[(nchar(first_settle_date_strf_year_full)==4)]
nrow(issue_date_amt_outstanding_secondpart_0607)

issue_date_amt_outstanding_secondpart_0607[,
                                           `:=`(offering_date_bb_part2=
                                                  paste0(first_settle_date_strf_year_full,
                                                         "-",
                                                         substr(as.character(first_settle_date_strf),1,2),
                                                         "-",
                                                         substr(as.character(first_settle_date_strf),4,5)))]

table(issue_date_amt_outstanding_secondpart_0607$sp_rating)
issue_date_amt_outstanding_secondpart_0607[
  ,
  `:=`(sp_rating_first3digits=substr(sp_rating,1,3),
       sp_rating_first1digits=substr(sp_rating,1,1))
]
issue_date_amt_outstanding_secondpart_0607[
  (sp_rating_first1digits=="A")|
    (sp_rating_first3digits=="BBB")|
    (sp_rating=="(P)BBB"),
  `:=`(grade_sp_rating_bb_part2="I")
]
issue_date_amt_outstanding_secondpart_0607[
  (sp_rating_first3digits=="NR/")|
    (sp_rating_first3digits=="#N/")|
    (sp_rating=="NR"),
  `:=`(grade_sp_rating_bb_part2="NR")
]
issue_date_amt_outstanding_secondpart_0607[
  is.na(grade_sp_rating_bb_part2),
  `:=`(grade_sp_rating_bb_part2="H")
]


issue_date_amt_outstanding_part2_to_merge<-
  issue_date_amt_outstanding_secondpart_0607[,c("cusip_id","amountoutstanding_thousand_bb_part2","offering_date_bb_part2",
                                                "grade_sp_rating_bb_part2"),with=FALSE]

issue_date_amt_outstanding_part2_to_merge[
  ,
  `:=`(count_seq=sequence(.N)),
  by=c("cusip_id")
]
table(issue_date_amt_outstanding_part2_to_merge$count_seq)
issue_date_amt_outstanding_part2_to_merge<-
  issue_date_amt_outstanding_part2_to_merge[count_seq==1]

Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2<-
  as.data.table(left_join(Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates,
                          issue_date_amt_outstanding_part2_to_merge,
                          by=c("cusip_id")))

print(table(Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2$new_mean_grade))
print(summary(Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2$ins_amount_outstanding))
print(summary(Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2$time_to_offering))

Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2[is.na(ins_amount_outstanding),
                                                                          `:=`(ins_amount_outstanding=amountoutstanding_thousand_bb_part2)]

Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2[is.na(time_to_offering), 
                                                                          `:=`(time_to_offering=as.integer(as.Date(trd_exctn_dt_s)-
                                                                                                             as.Date(offering_date_bb_part2)))]

table(Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2$new_mean_grade)
Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2[
  (new_mean_grade==""),
  `:=`(new_mean_grade=grade_sp_rating_bb_part2)
]

table(Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2$new_mean_grade)
print(summary(Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2$ins_amount_outstanding))
print(summary(Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2$time_to_offering))


print(nrow(unique(Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2[is.na(ins_amount_outstanding),
                                                                                            c("cusip_id"),
                                                                                            with=FALSE])))


print(nrow(unique(Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2[is.na(time_to_offering),
                                                                                            c("cusip_id"),
                                                                                            with=FALSE])))
###total number of bonds
print(nrow(unique(Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2[,c("cusip_id"),with=FALSE])))  

fwrite(Trial_update_bloomberg1_FISD_fundamentals_ratings_earlierdates_bloomberg2,
       "Data_FilterMerge_TRACE_EOD.csv")





